### Summary ####
# Input: df_f, raw CE files 
# Output: Summary statistics tables and figures in Section 2
# Outline: 
# Part 1: Summary statistics from the final sample
# 1. Table I Panel A Column I and II
# 2. Table I Panel B Column I and II
# 3. Figure III
# 4. Table II
# Part 2: Summary statistics from raw CE (all interviews of CU interviewed in May, Jun, or Jul)
# 5. Table I Panel A Column III and IV
# 6. Table I Panel B Column III and IV
# IMPORTANT: Run code out of order can lead to different results

### ******** Results from the final panel ******** ####

### Summary ####
# Input: df_f, cnt20
# Output: Summary statistics table I Column 1 and 2, Table II, and Figure III

setwd(getwd())

### Data Processing ####
## Open libraries 
library(readr) # For importing datasets
library(readxl) # For importing datasets
library(dplyr) # For data processing 
library(ggplot2) # For graphs
library(weights) # For weighted summary statistics
library(expss) # For frequency tables


## Import data sets
cnt20cl <- read.csv("cnt20_cleaned.csv") %>% select(NEWID,RYYMM,REBTUSED,CHCKEFT,EIPI)
df_f <- read.csv("df_f.csv")

## Find the list of NEWIDs in the panel that could have EIPs
df_jun <- df_f %>% filter(YYMM==2006) %>% select(ID, NEWID, FINLWT21_AVG)
df_jul <- df_f %>% filter(YYMM==2007) %>% select(ID, NEWID, FINLWT21_AVG)
df_sep <- df_f %>% filter(YYMM==2009) %>% select(ID, NEWID, FINLWT21_AVG)
df_oct <- df_f %>% filter(YYMM==2010) %>% select(ID, NEWID, FINLWT21_AVG)
df_dec <- df_f %>% filter(YYMM==2012) %>% select(ID, NEWID, FINLWT21_AVG)
df_jan <- df_f %>% filter(YYMM==2101) %>% select(ID, NEWID, FINLWT21_AVG)


# Since we consider the first lag, the previous interview of the Sep CUs should also be counted 
df_sep_lag <- df_sep
df_sep_lag$NEWID <- df_sep_lag$NEWID - 1

# similarly for the other months 
df_oct_lag <- df_oct
df_oct_lag$NEWID <- df_oct_lag$NEWID - 1

df_dec_lag <- df_dec
df_dec_lag$NEWID <- df_dec_lag$NEWID - 1

df_jan_lag <- df_jan 
df_jan_lag$NEWID <- df_jan_lag$NEWID - 1

# bind all newids together and drop repititions

NEWID_list <- bind_rows(df_jun,df_jul,df_sep,df_sep_lag,df_oct,df_oct_lag,
          df_dec,df_dec_lag,df_jan,df_jan_lag) %>% distinct(NEWID,.keep_all = TRUE)

EIP <- merge(NEWID_list,cnt20cl,by="NEWID")

# Checking interviews with multiple EIPs
# rb1 <- EIP  %>% group_by(NEWID) %>% filter( n() == 1)
# rb2 <- EIP  %>% group_by(NEWID) %>% filter( n() == 2)
# rb3 <- EIP  %>% group_by(NEWID) %>% filter( n() == 3)
# rb4 <- EIP  %>% group_by(NEWID) %>% filter( n() == 4)
# rb5 <- EIP  %>% group_by(NEWID) %>% filter( n() == 5)
# rb6 <- EIP  %>% group_by(NEWID) %>% filter( n() == 6)
# rb7 <- EIP  %>% group_by(NEWID) %>% filter( n() == 7)
# rb8 <- EIP  %>% group_by(NEWID) %>% filter( n() == 8)

# Aggregating to the CU-month level
# EIP_m <- EIP %>% group_by(NEWID,RYYMM) %>%
#   mutate(TEIPI = sum(EIPI)) %>% 
#   distinct(NEWID,RYYMM, .keep_all = TRUE) %>%
#   select(-c(EIPI))

### Table I Panel A Column I and II ####
#### Column I ####
fre(EIP$RYYMM)

#### Column II ####
fre(EIP$RYYMM, weight=EIP$FINLWT21_AVG)

### Table I Panel B Column I and II ####

df_f_cu <- df_f %>% distinct(ID, .keep_all=TRUE)

# These are the non-recipients 
df_f_cu_nr <- df_f_cu %>% filter(r==0)

#### Column I ####
435/2558
length(df_f_cu_nr$ID)/length(df_f_cu$ID)

#### Column II ####
sum(df_f_cu_nr$FINLWT21_AVG)/sum(df_f_cu$FINLWT21_AVG)

### Table II ####

fre(EIP$CHCKEFT, weight=EIP$FINLWT21_AVG)


fre(EIP$REBTUSED, weight=EIP$FINLWT21_AVG)

### Table C.1 ####

# Aggregate to CU-3months level
EIP_Q <- EIP %>%
  group_by(NEWID) %>%
  mutate(EIPI_t=sum(EIPI)) %>%
  distinct(NEWID,.keep_all=TRUE) %>%
  ungroup()

table(cut(EIP_Q$EIPI_t,breaks=c(1,1199.9,1200.1,1699.9,1700.1,2399.9,2400.1,2899.9,2900.1,
                                3399.9,3400.1,3899.9,3900.1,100000)))


# Find the number of non-recipients in the panel

df_f_cu_p <- df_f %>% group_by(ID) %>%
  mutate(EIPI_t_total=sum(EIPI_t))

df_f_cu_p <- df_f_cu_p %>% group_by(ID) %>%
  mutate(EIPI_tm1_total=sum(EIPI_tm1))

df_f_cu_p$EIPI_total <- df_f_cu_p$EIPI_t_total + df_f_cu_p$EIPI_tm1_total

df_f_cu_p <- df_f_cu_p %>% distinct(ID, .keep_all=TRUE)

df_f_cu_nr_p <- df_f_cu_p %>% filter(EIPI_total==0)

# Note that we know that there are 498 non-recipients, so EIP = 0
498/2620
# 0<EIP<1200
99/2620
# EIP = 1200
763/2620
# 1200 < EIP < 1700
43/2620
# EIP = 1700
43/2620
# 1700 < EIP < 2400
108/2620
# EIP = 2400
626/2620
# 2400 < EIP < 2900
30/2620
# EIP = 2900
104/2620
# 2900 < EIP < 3400
21/2620
# EIP = 3400
71/2620
# 3400 < EIP < 3900
91/2620
# EIP = 3900
40/2620
# EIP > 3900
83/2620

# weighted numbers 
sum(df_f_cu_nr_p$FINLWT21_AVG)
# 0<EIP<1200
EIP_Q_1 <- EIP_Q %>% filter(EIPI_t>0 & EIPI_t <1200)
sum(EIP_Q_1$FINLWT21_AVG)
# EIP = 1200
EIP_Q_2 <- EIP_Q %>% filter(EIPI_t==1200)
sum(EIP_Q_2$FINLWT21_AVG)
# 1200 < EIP < 1700
EIP_Q_3 <- EIP_Q %>% filter(EIPI_t>1200 & EIPI_t <1700)
sum(EIP_Q_3$FINLWT21_AVG)
# EIP = 1700
EIP_Q_4 <- EIP_Q %>% filter(EIPI_t==1700)
sum(EIP_Q_4$FINLWT21_AVG)
# 1700 < EIP < 2400
EIP_Q_5 <- EIP_Q %>% filter(EIPI_t>1700 & EIPI_t <2400)
sum(EIP_Q_5$FINLWT21_AVG)
# EIP = 2400
EIP_Q_6 <- EIP_Q %>% filter(EIPI_t==2400)
sum(EIP_Q_6$FINLWT21_AVG)
# 2400 < EIP < 2900
EIP_Q_7 <- EIP_Q %>% filter(EIPI_t>2400 & EIPI_t <2900)
sum(EIP_Q_7$FINLWT21_AVG)
# EIP = 2900
EIP_Q_8 <- EIP_Q %>% filter(EIPI_t==2900)
sum(EIP_Q_8$FINLWT21_AVG)
# 2900 < EIP < 3400
EIP_Q_9 <- EIP_Q %>% filter(EIPI_t>2900 & EIPI_t <3400)
sum(EIP_Q_9$FINLWT21_AVG)
# EIP = 3400
EIP_Q_10 <- EIP_Q %>% filter(EIPI_t==3400)
sum(EIP_Q_10$FINLWT21_AVG)
# 3400 < EIP < 3900
EIP_Q_11 <- EIP_Q %>% filter(EIPI_t>3400 & EIPI_t <3900)
sum(EIP_Q_11$FINLWT21_AVG)
# EIP = 3900
EIP_Q_12 <- EIP_Q %>% filter(EIPI_t==3900)
sum(EIP_Q_12$FINLWT21_AVG)
# EIP > 3900
EIP_Q_13 <- EIP_Q %>% filter(EIPI_t>3900)
sum(EIP_Q_13$FINLWT21_AVG)

sum(df_f_cu_nr_p$FINLWT21_AVG) + sum(EIP_Q_1$FINLWT21_AVG) + sum(EIP_Q_2$FINLWT21_AVG)+
  sum(EIP_Q_3$FINLWT21_AVG) + sum(EIP_Q_4$FINLWT21_AVG) + sum(EIP_Q_5$FINLWT21_AVG) + sum(EIP_Q_6$FINLWT21_AVG) + 
  sum(EIP_Q_7$FINLWT21_AVG) + sum(EIP_Q_8$FINLWT21_AVG) + sum(EIP_Q_9$FINLWT21_AVG) + sum(EIP_Q_10$FINLWT21_AVG) + 
  sum(EIP_Q_11$FINLWT21_AVG) + sum(EIP_Q_12$FINLWT21_AVG) + sum(EIP_Q_13$FINLWT21_AVG)

sum(df_f_cu_nr_p$FINLWT21_AVG)/67505928
sum(EIP_Q_1$FINLWT21_AVG)/67505928
sum(EIP_Q_2$FINLWT21_AVG)/67505928
sum(EIP_Q_3$FINLWT21_AVG)/67505928
sum(EIP_Q_4$FINLWT21_AVG)/67505928
sum(EIP_Q_5$FINLWT21_AVG)/67505928
sum(EIP_Q_6$FINLWT21_AVG)/67505928
sum(EIP_Q_7$FINLWT21_AVG)/67505928
sum(EIP_Q_8$FINLWT21_AVG)/67505928
sum(EIP_Q_9$FINLWT21_AVG)/67505928
sum(EIP_Q_10$FINLWT21_AVG)/67505928
sum(EIP_Q_11$FINLWT21_AVG)/67505928
sum(EIP_Q_12$FINLWT21_AVG)/67505928
sum(EIP_Q_13$FINLWT21_AVG)/67505928


# average EIP 
summary(EIP_Q$EIPI_t)
weighted.mean(EIP_Q$EIPI_t,EIP_Q$FINLWT21_AVG)

sd(EIP_Q$EIPI_t)


### Table C.5 ####
df_f$MARITAL_t <- ifelse(df_f$MARITAL1_t == 1, 1, 0)

# For single, without kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 100000 & FINCBTXM_FST > 75000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 125000 & FINCBTXM_FST > 100000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

# For single, with kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)
#
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)
#
check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

# For married, no kids
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 325000 & FINCBTXM_FST > 300000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 350000 & FINCBTXM_FST > 325000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 375000 & FINCBTXM_FST > 350000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 400000 & FINCBTXM_FST > 375000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 425000 & FINCBTXM_FST > 400000)
table(check$r)

# For married, with kids
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 325000 & FINCBTXM_FST > 300000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 350000 & FINCBTXM_FST > 325000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 375000 & FINCBTXM_FST > 350000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 400000 & FINCBTXM_FST > 375000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 425000 & FINCBTXM_FST > 400000)
table(check$r)

# For adults, no kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 325000 & FINCBTXM_FST > 300000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 350000 & FINCBTXM_FST > 325000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 375000 & FINCBTXM_FST > 350000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 400000 & FINCBTXM_FST > 375000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 425000 & FINCBTXM_FST > 400000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 450000 & FINCBTXM_FST > 425000)
table(check$r)

# For adults, with kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 325000 & FINCBTXM_FST > 300000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 350000 & FINCBTXM_FST > 325000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 375000 & FINCBTXM_FST > 350000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 400000 & FINCBTXM_FST > 375000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 425000 & FINCBTXM_FST > 400000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 450000 & FINCBTXM_FST > 425000)
table(check$r)


### ******** Results from raw CE ******** ####

### Summary ####
# Input: cnt20 and fmli193 -- fmli204
# Output: Summary statistics table I column 3 and 4

### Data Processing ####
# Import datasets
cnt20 <- read_excel("Raw data/cnt20.xlsx") %>% filter(CONTCODE==800)
fmli193 <- read_excel("Raw data/fmli193.xlsx") 
fmli194 <- read_excel("Raw data/fmli194.xlsx") 
fmli201 <- read_excel("Raw data/fmli201.xlsx") 
fmli202 <- read_excel("Raw data/fmli202.xlsx") 
fmli203 <- read_excel("Raw data/fmli203.xlsx") 
fmli204 <- read_excel("Raw data/fmli204.xlsx") 
fmli211 <- read_excel("Raw data/fmli211.xlsx") %>% filter(QINTRVMO==1) 

####  Obtain interviews with rebates from cnt20
cnt20_rc <- cnt20  %>%
  # rename CONTMO
  mutate(RYYMM = ifelse(CONTMO==4,2004,
                        ifelse(CONTMO==5,2005,
                               ifelse(CONTMO==6,2006,
                                      ifelse(CONTMO==7,2007,
                                             ifelse(CONTMO==8,2008,
                                                    ifelse(CONTMO==9,2009,
                                                           ifelse(CONTMO==10,2010,
                                                                  ifelse(CONTMO==11,2011,
                                                                         ifelse(CONTMO==12,2012,
                                                                                ifelse(CONTMO==1,2101,2102))))))))))) %>% 
  select(-CONTMO) %>% 
  # Drop EIPII in cnt20_rc 
  filter(RYYMM!=2012 & RYYMM!=2101 & RYYMM!=2102) %>%
  select(NEWID,RYYMM,CHCKEFT,REBTUSED,CONTEXPX) %>%
  # Rename CONTEXPX
  rename(EIPI = CONTEXPX)

#### Obtain all interviews from fmli's
fmli <- rbind(fmli202, fmli203, fmli204, fmli211) %>% 
  select(NEWID,QINTRVMO) %>%
  mutate(YYMM=ifelse(QINTRVMO==4,2004,
                     ifelse(QINTRVMO==5, 2005,
                            ifelse(QINTRVMO==6, 2006,
                                   ifelse(QINTRVMO==7,2007,
                                          ifelse(QINTRVMO==8,2008,
                                                 ifelse(QINTRVMO==9,2009,
                                                        ifelse(QINTRVMO==10,2010,
                                                               ifelse(QINTRVMO==11,2011,
                                                                      ifelse(QINTRVMO==12,2012,2101)))))))))) %>%
  filter(YYMM!=2004) %>% 
  select(-c(QINTRVMO))

fmli_flt <- fmli %>% 
  select (NEWID) %>% 
  mutate(
    RYYMM = NA,
    CHCKEFT = NA,
    REBTUSED = NA,
    EIPI = 0
  )

#### Obtain interview without rebates 
# cnt20_nr contains all interviews without rebates reported 
cnt20_nr <- fmli_flt %>% filter(!(NEWID %in% cnt20_rc$NEWID))

# Merge to form a cnt20_f that contains all information about rebates
cnt20_f <- rbind(cnt20_rc,cnt20_nr)

#### Merge df with fmli (April to Jan 2021 interviews) 
# df now contains all EIPI information as well as other info already in fmli

# Note that cnt20_f has 92 more CUs than fmli
# These are November rebates reported in Feb 2021
# They are dropped after merge

df <- merge(fmli,cnt20_f,by="NEWID")

####  Keep only CUs that are interviewed in May, June, and July
df <- df %>% mutate(
  ID = substr(as.character(NEWID),1,6))

df_all <- df

may_list <- df %>% filter(YYMM==2005) %>% select(ID)
jun_list <- df %>% filter(YYMM==2006) %>% select(ID)
jul_list <- df %>% filter(YYMM==2007) %>% select(ID)

df <- df %>%
  filter(ID %in% may_list$ID | ID %in% jun_list$ID | ID %in% jul_list$ID) 

#### Obtain weights 
fmli193_wts <- fmli193 %>% select(NEWID,FINLWT21)
fmli194_wts <- fmli194 %>% select(NEWID,FINLWT21)
fmli201_wts <- fmli201 %>% select(NEWID,FINLWT21)
fmli202_wts <- fmli202 %>% select(NEWID,FINLWT21)
fmli203_wts <- fmli203 %>% select(NEWID,FINLWT21)
fmli204_wts <- fmli204 %>% select(NEWID,FINLWT21)
fmli211_wts <- fmli211 %>% select(NEWID,FINLWT21)

ID_creator <- function(fmli){
  fmli_weights_income <- fmli %>% 
    mutate(
      ID = substr(as.character(NEWID),1,6),
    ) %>% select(ID, FINLWT21)
  return(fmli_weights_income)}

fmli193_wts<- ID_creator(fmli193)
fmli194_wts <- ID_creator(fmli194)
fmli201_wts <- ID_creator(fmli201)
fmli202_wts <- ID_creator(fmli202)
fmli203_wts <- ID_creator(fmli203)
fmli204_wts <- ID_creator(fmli204)
fmli211_wts <- ID_creator(fmli211) 

# Merge to obtain the weights, income, and liquidity in each interview 
wts <- merge(fmli193_wts,fmli194_wts,by="ID",all=TRUE)

wts <- wts %>% rename(
  FINLWT21_193 = FINLWT21.x, FINLWT21_194 = FINLWT21.y)

wts <- merge(wts,fmli201_wts, by="ID", all=TRUE)
wts <- wts %>% rename(
  FINLWT21_201 = FINLWT21)

wts <- merge(wts,fmli202_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_202 = FINLWT21)

wts <- merge(wts,fmli203_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_203 = FINLWT21)

wts <- merge(wts,fmli204_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_204 = FINLWT21)

wts <- merge(wts,fmli211_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_211 = FINLWT21)

# Average weights 
wts$FINLWT21_AVG <- rowMeans(wts[,c("FINLWT21_193",
                                    "FINLWT21_194",
                                    "FINLWT21_201",
                                    "FINLWT21_202",
                                    "FINLWT21_203",
                                    "FINLWT21_204",
                                    "FINLWT21_211")], 
                             na.rm=TRUE)

wts <- wts %>% select(ID,FINLWT21_AVG)

#### merge with weights 
df <- merge(df,wts,by="ID")

#### find only interviews with rebates
df_r <- df %>% filter(EIPI>0)

#### Aggregating to the CU-month level
# df_r <- df_r %>% group_by(NEWID,RYYMM) %>%
#   mutate(TEIPI = sum(EIPI)) %>% 
#   distinct(NEWID,RYYMM, .keep_all = TRUE) %>%
#   select(-c(EIPI))

### Table I Panel A Column III and IV ####

#### Column III ####
fre(df_r$RYYMM)
1505*1.5 / (1505*1.5+1499+377+81+25+6+1+2)
1499 / (1505*1.5+1499+377+81+25+6+1+2)
377 / (1505*1.5+1499+377+81+25+6+1+2)
(81+25+6+1+2) / (1505*1.5+1499+377+81+25+6+1+2)

#### Column IV ####
fre(df_r$RYYMM,weight=df_r$FINLWT21_AVG)

39976979.3*1.5 / (39976979.3*1.5 + 38065333.7 + 9940881.9 + 2058854.5 + 560299.1 + 191308.6 + 29857.3 + 46310.2)
38065333.7 / (39976979.3*1.5 + 38065333.7 + 9940881.9 + 2058854.5 + 560299.1 + 191308.6 + 29857.3 + 46310.2)
9940881.9 / (39976979.3*1.5 + 38065333.7 + 9940881.9 + 2058854.5 + 560299.1 + 191308.6 + 29857.3 + 46310.2)
(2058854.5 + 560299.1 + 191308.6 + 29857.3 + 46310.2)/(39976979.3*1.5 + 38065333.7 + 9940881.9 + 2058854.5 + 560299.1 + 191308.6 + 29857.3 + 46310.2)

### Table I Panel B Column III and IV ####

df_cu <- df %>%
  filter(YYMM != 2003 & YYMM!=2004) %>%
  group_by(ID) %>%
  arrange(YYMM, .by_group = TRUE) %>%
  mutate(TotalEIPI=sum(EIPI),
         r = ifelse(TotalEIPI>0,1,0)) %>% 
  distinct(ID,.keep_all = TRUE)

# Percent of non-recipients (unweighted)
# 50% of EIPs received in April reported by June and July interviews are 1413/2 = 706.5
# There are in total 1641 May interviews remains in the cnt20_f

# Generating 1641 random numbers (0 or 1) with 707
# to indicate whether a May interview gets a payment in apr

may_cu <- df_cu %>% filter(YYMM==2005)
jun_cu <- df_cu %>% filter(YYMM==2006)
jul_cu <- df_cu %>% filter(YYMM==2007)

may_apr_r_ran <- rep(1,707)
may_apr_nr_ran <- rep(0,934)
may_apr_ran <- c(may_apr_r_ran,may_apr_nr_ran)
# Create a permutation
set.seed(730947)
may_apr_ran <- sample(may_apr_ran)

# randomly assign those generated EIPs to May recipients 

may_cu$draw <- may_apr_ran
may_cu <- may_cu %>% 
  mutate(draw = draw + r,
         r = ifelse(draw>=1,1,0)) %>% 
  select(-(draw))

df_cu <- rbind(may_cu,jun_cu,jul_cu)

df_cu_nr <- df_cu %>% filter(r==0)

#### Column III ####
1165/4720

#### Column IV ####
sum(df_cu_nr$FINLWT21_AVG)/sum(df_cu$FINLWT21_AVG)

### Total number and dollar amount of EIPs ####
#### merge with weights 
df_all <- merge(df_all,wts,by="ID")

#### Find only interviews with rebates
df_all_r <- df_all %>% filter(EIPI>0)

# Scale up weights for April payments
df_all_r <- df_all_r %>%
  mutate(FINLWT21_AVG_NEW = ifelse(RYYMM==2004,FINLWT21_AVG*1.5,FINLWT21_AVG))

# Total number of rebates
fre(df_all_r$RYYMM,weight=df_all_r$FINLWT21_AVG_NEW)

agg_eip <- df_all_r %>% 
  group_by(RYYMM) %>% 
  summarise(eip_payments = round(sum(FINLWT21_AVG_NEW))) %>% 
  mutate(year = 2000 + floor(RYYMM/100),
         month = RYYMM - floor(RYYMM/100)*100) %>% 
  select(year,month,eip_payments)

write_csv(agg_eip,"agg_eipi.csv")

# Total amount of payments
sum(df_all_r$EIPI*df_all_r$FINLWT21_AVG_NEW)


### Numbers in text ####
## Chetty et al. MPC calculation
estimate = (25.15 + 8.45)/200 # average of top & bottom quartile estimates
df_all_jan <- read_csv("df_all_cu.csv") %>% filter(YYMM==2001)
avg_daily_spend = wtd.mean(df_all_jan$EX_T_t,weights = df_all_jan$FINLWT21_AVG)/90

mpc = avg_daily_spend*14*estimate/(0.75*1715.15) #average eip payment scaled by 75% receipt rate
mpc


## Average expenditure for Table III
full_df <- bind_rows(read_csv("df_f.csv"), read_csv("../CE EIP II/df_f.csv"), read_csv("../CE EIP III/df_f.csv"))
full_df <- full_df %>% distinct(ID,YYMM,.keep_all = TRUE)

wtd.mean(full_df$EX_T_t,weights=full_df$FINLWT21_AVG)
wtd.mean(full_df$EX_SN_t,weights=full_df$FINLWT21_AVG)
wtd.mean(full_df$EX_N_t,weights=full_df$FINLWT21_AVG)
wtd.mean(full_df$EX_FD_t,weights=full_df$FINLWT21_AVG)

